Tutorial to PollyCommonR

Link to PollyCommonR repo : pollycommonr

In [2]:
library(PollyCommonR)
In [6]:
raw_data <- read.csv("data/demo_raw_intensity.csv", stringsAsFactors = FALSE, check.names = FALSE)
head(raw_data)
A data.frame: 6 × 40
labelmetaGroupIdgroupIdgoodPeakCountmedMzmedRtmaxQualityisotopeLabelcompoundcompoundId066_neg_Cohort_3_6069_neg_Cohort_4_1070_neg_Cohort_4_2071_neg_Cohort_4_3072_neg_Cohort_4_4073_neg_Cohort_4_5074_neg_Cohort_4_6077_neg_Cohort_5_1078_neg_Cohort_5_2079_neg_Cohort_5_3
<chr><int><int><int><dbl><dbl><dbl><lgl><chr><chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1g1087108727174.09862.1410.726070NAStd-L-Phenylalanine Std-L-Phenylalanine1584187.251468345.50 1544608.501714852.62 1636158.38 1618905.881501471.381606435.121710179.121452017.38
2g 1 1 6337.05384.4820.839274NA1-(5'-Phosphoribosyl)-5-amino-4-imidazolecarboxamide C04677 96128.07 89643.53 76762.26 62934.05 77589.27 57863.31 77545.28 28954.86 30593.00 31275.40
3g 2 229362.05015.7820.837433NAGMP C00144 8600392.005157629.5010560783.009426739.0010523749.0014828464.008538355.00 791279.001119323.881543209.62
4g 6 611358.12523.6640.839334NA3-(3-Amino-3-carboxypropyl)-1-methylpseudouridine 193882 20047.80 7494.35 6160.37 5848.58 5607.81 5986.60 6793.59 10064.21 5370.90 10144.59
5g 8 819352.06393.0200.675565NA2,5-Diamino-6-(5-phospho-D-ribosylamino)pyrimidin-4(3H)-oneC01304 479267.84 102332.84 271826.59 342597.41 131523.53 157810.39 276580.16 133855.23 177058.45 326424.78
6g 9 918347.03955.1420.835507NAIMP C00130 36267.29 44338.24 75300.16 106004.64 133378.64 120469.48 83474.91 30340.93 48158.53 34707.48
In [9]:
metadata <- read.csv("data/demo_metadata.csv", stringsAsFactors = FALSE)
head(metadata)
A data.frame: 6 × 2
SampleCohort
<chr><chr>
1045_neg_Cohort_1_1Cohort_1
2046_neg_Cohort_1_2Cohort_1
3047_neg_Cohort_1_3Cohort_1
4048_neg_Cohort_1_4Cohort_1
5049_neg_Cohort_1_5Cohort_1
6053_neg_Cohort_2_1Cohort_2

Create Sample Matrix

In [66]:
raw_data$uniqueId <- paste0(raw_data$groupId, "_", raw_data$compound)
sample_raw_mat <- sample_intensity_matrix(raw_intensity_df = raw_data, metadata_df = metadata,rownames_col = "uniqueId")
sample_raw_mat <- sample_raw_mat + 1
Make Sample Intensity Matrix Started...
Make Sample Intensity Matrix Completed...
In [67]:
head(sample_raw_mat)
A data.frame: 6 × 26
045_neg_Cohort_1_1046_neg_Cohort_1_2047_neg_Cohort_1_3048_neg_Cohort_1_4049_neg_Cohort_1_5053_neg_Cohort_2_1054_neg_Cohort_2_2055_neg_Cohort_2_3056_neg_Cohort_2_4057_neg_Cohort_2_5066_neg_Cohort_3_6069_neg_Cohort_4_1070_neg_Cohort_4_2071_neg_Cohort_4_3072_neg_Cohort_4_4073_neg_Cohort_4_5074_neg_Cohort_4_6077_neg_Cohort_5_1078_neg_Cohort_5_2079_neg_Cohort_5_3
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1087_Std-L-Phenylalanine 1567911.88 1440968.75 1562951.88 1647527.62 1885452.25 1484189.75 1308691.251665554.38 1563148.75 1485556.881584188.251468346.50 1544609.501714853.62 1636159.38 1618906.881501472.381606436.121710180.121452018.38
1_1-(5'-Phosphoribosyl)-5-amino-4-imidazolecarboxamide 79556.91 62094.95 66660.32 66934.00 56647.23 108301.38 99756.58 104860.88 98019.90 113850.31 96129.07 89644.53 76763.26 62935.05 77590.27 57864.31 77546.28 28955.86 30594.00 31276.40
2_GMP19532485.0019489651.0015856300.0020820675.0020047697.0012675280.0012175108.008954876.0011735648.0013169477.008600393.005157630.5010560784.009426740.0010523750.0014828465.008538356.00 791280.001119324.881543210.62
6_3-(3-Amino-3-carboxypropyl)-1-methylpseudouridine 29394.64 26366.08 18059.20 28485.74 37411.11 18316.81 17952.63 12225.86 31280.92 25987.82 20048.80 7495.35 6161.37 5849.58 5608.81 5987.60 6794.59 10065.21 5371.90 10145.59
8_2,5-Diamino-6-(5-phospho-D-ribosylamino)pyrimidin-4(3H)-one 236298.36 162778.95 279373.66 167854.88 364775.66 461588.50 319580.31 382526.62 449034.78 362122.84 479268.84 102333.84 271827.59 342598.41 131524.53 157811.39 276581.16 133856.23 177059.45 326425.78
9_IMP 93552.86 168591.88 67086.95 128707.02 155858.52 44962.14 50617.97 34853.89 58663.90 75660.60 36268.29 44339.24 75301.16 106005.64 133379.64 120470.48 83475.91 30341.93 48159.53 34708.48

COV calculations

In [68]:
cov_cal_df <- calculate_cohortwise_cov(raw_matrix = sample_raw_mat, metadata = metadata, cohort_col = "Cohort")
Calculate Coefficient of Variation Started...
Calculate Coefficient of Variation Completed...
In [69]:
head(cov_cal_df)
A data.frame: 6 × 5
idcohortmeanstdcv
<chr><chr><dbl><dbl><dbl>
11087_Std-L-Phenylalanine Cohort_1 1620962.48 165255.77410.19492
21_1-(5'-Phosphoribosyl)-5-amino-4-imidazolecarboxamide Cohort_1 66378.68 8472.08512.76326
32_GMP Cohort_119149361.601917493.81310.01336
46_3-(3-Amino-3-carboxypropyl)-1-methylpseudouridine Cohort_1 27943.35 6932.22724.80814
58_2,5-Diamino-6-(5-phospho-D-ribosylamino)pyrimidin-4(3H)-oneCohort_1 242216.30 84077.94234.71193
69_IMP Cohort_1 122759.45 42379.19834.52215
In [70]:
p <- create_cohortwise_cov_boxplot(calculated_cov_df = cov_cal_df, interactive = FALSE)
p
Create Coefficient of Variation Boxplot Started...
Create Coefficient of Variation Boxplot Completed...
In [71]:
p <- create_cohortwise_cov_barplot(calculated_cov_df = cov_cal_df,id_order = '1087_Std-L-Phenylalanine', id_col = 'id')
p
Create Coefficient of Variation Boxplot Started...
Create Coefficient of Variation Boxplot Completed...

Pre Normalization

Boxplot on Raw Data

In [72]:
p <- create_boxplot_on_matrix(sample_raw_mat = sample_raw_mat, x_label = "Sample",y_label = "Raw Intensity",title_label = "Boxplot on Raw Data")
p
Create Boxplot On Matrix Started...
Create Boxplot On Matrix Completed...

Density Plot on Raw Data

In [111]:
p <- create_densityplot_on_matrix(sample_raw_mat)
p
Create Densityplot On Matrix Started...
Create Densityplot On Matrix Completed...

PCA on Raw Data

In [73]:
pca_compute <- compute_pca(sample_raw_mat = sample_raw_mat)
Compute PCA Started...
Compute PCA Completed...
In [74]:
plot_proportion_of_variance(PCAObj_Summary = pca_compute)
Make Proportion of Variance Plot Started...
Make Proportion of Variance Plot Started...
In [75]:
p <- plot_pca(pca_compute, metadata = metadata, condition = 'Cohort', title_label = "PCA on Raw Data", interactive = FALSE)
p
Plot PCA Started...
Plot PCA Completed...
In [97]:
p <- plot_pca3d(pca_compute, metadata = metadata, condition = 'Cohort', pc_x = 1, pc_y = 2, pc_z = 3, title_label = "PCA on Raw Data")
p
Plot PCA3D Started...
Plot PCA3D Completed...

Post Normalization

Normalization by Internal Standard

In [76]:
norm_agent <- t(sample_raw_mat["1087_Std-L-Phenylalanine",])
head(norm_agent)
A matrix: 6 × 1 of type dbl
1087_Std-L-Phenylalanine
045_neg_Cohort_1_11567912
046_neg_Cohort_1_21440969
047_neg_Cohort_1_31562952
048_neg_Cohort_1_41647528
049_neg_Cohort_1_51885452
053_neg_Cohort_2_11484190
In [81]:
norm_mat <- normalize_by_scaling_factor(sample_raw_mat, normalization_agent = norm_agent, scaling_factor_col = 1)
Normalize By Sample Factor Started...
Normalize By Sample Factor Completed...

Log2 Transformation

In [82]:
log2_norm_mat <- log2(norm_mat)
log2_norm_mat_shift <- max(abs(log2_norm_mat)) + log2_norm_mat
In [86]:
head(log2_norm_mat_shift)
A data.frame: 6 × 26
045_neg_Cohort_1_1046_neg_Cohort_1_2047_neg_Cohort_1_3048_neg_Cohort_1_4049_neg_Cohort_1_5053_neg_Cohort_2_1054_neg_Cohort_2_2055_neg_Cohort_2_3056_neg_Cohort_2_4057_neg_Cohort_2_5066_neg_Cohort_3_6069_neg_Cohort_4_1070_neg_Cohort_4_2071_neg_Cohort_4_3072_neg_Cohort_4_4073_neg_Cohort_4_5074_neg_Cohort_4_6077_neg_Cohort_5_1078_neg_Cohort_5_2079_neg_Cohort_5_3
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1087_Std-L-Phenylalanine20.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.8464820.84648
1_1-(5'-Phosphoribosyl)-5-amino-4-imidazolecarboxamide16.5457716.3100616.2951816.2250615.7897217.0699317.1329116.8570316.8512417.1406916.8038516.8126516.5158016.0784016.4481916.0402816.5713015.0526115.0417215.30964
2_GMP24.4854424.6040824.1891924.5061224.2569323.9407524.0642223.2731523.7548523.9946023.2871422.6590023.6198823.3051523.5317424.0417623.3540619.8248820.2349620.93435
6_3-(3-Amino-3-carboxypropyl)-1-methylpseudouridine15.1093315.0742714.4110814.9925615.1911814.5061214.6586913.7565615.2034515.0094514.5424013.2325012.8767012.6509412.6580812.7676513.0587013.5281412.5319813.68542
8_2,5-Diamino-6-(5-phospho-D-ribosylamino)pyrimidin-4(3H)-one18.1163217.7004318.3624717.5514618.4766519.1614818.8126018.7241119.0469218.8100319.1216417.0036418.3400018.5229817.2095717.4877318.4058817.2613717.5746418.69325
9_IMP16.7795617.7510516.3043817.1683417.2498815.8016616.1541515.2679416.1106416.5511615.3975915.7970116.4880516.8306117.2297817.0982116.6776015.1200715.6963015.45985

Boxplot on Normalized Data

In [91]:
p <- create_boxplot_on_matrix(log2_norm_mat_shift, x_label = "Sample",y_label = "Normalized Intensity",title_label = "Boxplot on Normalized Data")
p
Create Boxplot On Matrix Started...
Create Boxplot On Matrix Completed...

Density Plot on Normalized Data

In [112]:
p <- create_densityplot_on_matrix(log2_norm_mat_shift)
p
Create Densityplot On Matrix Started...
Create Densityplot On Matrix Completed...

PCA on Normalized Data

In [92]:
norm_pca_compute <- compute_pca(log2_norm_mat_shift)
Compute PCA Started...
Compute PCA Completed...
In [93]:
plot_proportion_of_variance(PCAObj_Summary = norm_pca_compute)
Make Proportion of Variance Plot Started...
Make Proportion of Variance Plot Started...
In [94]:
p <- plot_pca(norm_pca_compute,metadata = metadata, condition = 'Cohort', title_label = "PCA on Normalized Data", interactive = FALSE)
p
Plot PCA Started...
Plot PCA Completed...
In [99]:
p <- plot_pca3d(norm_pca_compute, metadata = metadata, condition = 'Cohort', pc_x = 1, pc_y = 2, pc_z = 3, title_label = "PCA on Normalized Data")
p
Plot PCA3D Started...
Plot PCA3D Completed...

PCA on Filtered Normalized Data

In [104]:
filtered_metadata <- filter_metadata_by_cohorts(metadata, condition = "Cohort", selected_cohorts = c("Cohort_2", "Cohort_5", "Cohort_3"))
filtered_log2_norm_mat_shift <- log2_norm_mat_shift[,filtered_metadata[,1]]
Filter Metadata Started...
Filter Metadata Completed...
In [106]:
filtered_pca_compute <- compute_pca(filtered_log2_norm_mat_shift)
Compute PCA Started...
Compute PCA Completed...
In [108]:
p <- plot_pca(filtered_pca_compute, metadata = metadata, condition = 'Cohort', title_label = "PCA on Normalized Data", interactive = FALSE)
p
Plot PCA Started...
Plot PCA Completed...

Samplewise bar plot for single metabolite

In [119]:
p <- create_samplewise_barplot(log2_norm_mat_shift, metadata, id_name = "2_GMP", cohort_col = "Cohort",
                               x_label = "Sample", y_label = "Normalized Intensity",  title_label = "GMP")
p
Create Samplewise Barplot Started...
Create Samplewise Barplot Started...

Differential Expression Analysis

In [123]:
diff_exp <- diff_exp_limma(prot_norm_mat = log2_norm_mat_shift, metadata, cohort_condition = 'Cohort',
                           cohort_a = "Cohort_2", cohort_b = "Cohort_1", 
                           p_val_correct_methods = "BH(FDR)", log_flag = FALSE)
Calculate Differential Expression Limma Started...
Loading required package: limma
Calculate Differential Expression Limma Completed...
In [130]:
p <- plot_volcano_from_limma(diff_exp_rdesc = diff_exp, log2fc_range = 0, p_val_cutoff = 0.05, interactive = FALSE)
p
Make Volcano Plot Started...
not significant     significant 
            367             188 
Make Volcano Plot Completed...

Perform Anova Test

In [131]:
log2_norm_mat_shift_anova <- one_way_anova_on_matrix(log2_norm_mat_shift, metadata_df = metadata, cohort_col = "Cohort")
One Way Anova On Matrix Started...
One Way Anova On Matrix Completed...
In [132]:
head(log2_norm_mat_shift_anova)
A data.frame: 6 × 3
idF.ValueP.Value
<chr><dbl><dbl>
11087_Std-L-Phenylalanine 1.0600964.007126e-01
21_1-(5'-Phosphoribosyl)-5-amino-4-imidazolecarboxamide 42.4593748.934000e-10
32_GMP 61.0085792.928969e-11
46_3-(3-Amino-3-carboxypropyl)-1-methylpseudouridine 29.3352092.507417e-08
58_2,5-Diamino-6-(5-phospho-D-ribosylamino)pyrimidin-4(3H)-one10.6989436.893525e-05
69_IMP 9.2974071.735864e-04
In [ ]: